========

SETUP

========

FILEPATH:

filepath = "https://raw.githubusercontent.com/shabanm2/Utqiagvik/main/Meteorological_Seasons_Data/"

PICK DATE RANGES:

years = c("2022", "2023")
seasons = c("Spring", "Summer","Fall")
sites = c("TNHA", "BEO", "SSMH")

# put season values for the season that has the start of the data
date_start = "2022-06-01" # data starts in June 2022 (YEAR-MO-DY) where day is always 01
# put season values for the season that has the last of the data
date_end = "2023-11-01" # data ends after November of 2023 (will get data up UNTIL date_end not after)

PICK OUTPUT:

scree = F # scree plot
eigen = T # eigenvectors and eigenvalues

Dates (for selecting date ranges; no need to edit)

spring_months = c("March", "April", "May")
summer_months = c("June","July","August")
fall_months = c("September", "October", "November")
winter_months = c("December", "January", "February")
spring_dates = data.frame(months=spring_months, start=c("-03-01","-04-01","-05-01"), end=c("-04-01","-05-01","-06-01"))

summer_dates = data.frame(months=summer_months, start=c("-06-01","-07-01","-08-01"), end=c("-07-01","-08-01","-09-01"))

fall_dates = data.frame(months=fall_months, start=c("-09-01","-10-01","-11-01"), end=c("-10-01","-11-01","-12-01"))

winter_dates = data.frame(months=winter_months, start=c("-12-01","-01-01","-02-01"), end=c("-01-01","-02-01","-03-01"))
all_dates = data.frame(matrix(nrow = 0, ncol = 4))
for(yur in years){
  # spring
  if("Spring" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, spring_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(spring_dates[i,1], paste0(yur, spring_dates[i, 2]), paste0(yur, spring_dates[i, 3]),"Spring"))
      }
    }
  }
  if("Summer" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, summer_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(summer_dates[i,1], paste0(yur, summer_dates[i, 2]), paste0(yur, summer_dates[i, 3]),"Summer"))
      }
    }
  }
  if("Fall" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, fall_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(fall_dates[i,1], paste0(yur, fall_dates[i, 2]), paste0(yur, fall_dates[i, 3]),"Fall"))
      }
    }
  }
  if("Winter" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, winter_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(winter_dates[i,1], paste0(yur, winter_dates[i, 2]), paste0(yur, winter_dates[i, 3]),"Winter"))
      }
    }
  }
  
}
colnames(all_dates) = c("months","start","end","szn")

========

LOADING

========

Packages

library(dplyr)
library(lubridate)
library(tidyverse)

Read File

file <- "all_sites_daily_gap_filled.csv"
df <- read.csv(paste0(filepath, file))

# rename all BEO stations to BEO-BASE
df$station <- recode(df$station, "B05" = "BASE", "B06" = "BASE", "B07" = "BASE", "B08" = "BASE")
df$fullname <- recode(df$fullname, "BEO-B05" = "BEO-BASE", "BEO-B06" = "BEO-BASE", "BEO-B07" = "BEO-BASE", "BEO-B08" = "BEO-BASE")

# update depths

df$grounddepth <- recode(df$grounddepth, "7" = "3.5cm", "8" = "10cm", "9" = "20cm", "10" = "30cm", "11" = "40cm", "12" = "50cm", "13" = "55cm", "14" = "65cm", "15" = "75cm", "16" = "85cm", "17" = "90cm")
df$grounddepth = factor(df$grounddepth, c("3.5cm","10cm", "20cm", "30cm", "40cm", "50cm", "55cm", "65cm", "75cm", "85cm", "90cm"))

df$vwcdepth <- recode(df$vwcdepth, "1" = "0:15cm", "2" = "15:30cm", "3" = "30:45cm", "4" = "45:60cm", "5" = "60:75cm", "6" = "75:90cm")
df$vwcdepth = factor(df$vwcdepth, c("0:15cm","15:30cm", "30:45cm", "45:60cm", "60:75cm", "75:90cm"))

df$Date = as.character(df$day)

=========================

DEFINE FUNCTIONS

=========================

Temporal Range: Season Vertical Spatial Range: 30-45 cm Horizontal Spatial Range: stations across site (TNHA, SSMH, BEO) –> Average Total Site –> North vs South (except for BEO)

Filter by Site and Join Tables

pick_site <- function(cursite){
  # PREVIOUS CODE JOINED MULTIPLE DATAFRAMES (from PCA-S24.Rmd)
#  gtfile = paste0("GroundTemperature_",szn,yr,"_DAILY.csv")
#  airfile = paste0("AirTemperature_",szn,yr,"_DAILY.csv")
#  vwcfile = paste0("VWC_",szn,yr,"_DAILY.csv")
#  solfile = paste0("Solar_",szn,yr,"_DAILY.csv")
#  windspeed = paste0("WindSpeed_",szn,yr,"_DAILY.csv")
#  winddir = paste0("WindDirection_",szn,yr,"_DAILY.csv")
#  
#  grndtmp <<- read.csv(paste0(filepath, gtfile))
#  airtmp <<- read.csv(paste0(filepath, airfile))
#  vwc <<- read.csv(paste0(filepath, vwcfile))
#  solar <<- read.csv(paste0(filepath, solfile))
#  wind <<- read.csv(paste0(filepath, windspeed))
  
#  pca_ground = grndtmp %>% filter(site == cursite) %>% filter(depth == "30cm")
#  pca_air = airtmp %>% filter(site == cursite)
#  pca_wind = wind %>% filter(site == cursite)
#  pca_solar = solar %>% filter(site == cursite)
#  pca_vwc = vwc %>% filter(site==cursite) %>% filter(depth == "30-45cm")
#  
#  big_df <<- full_join(pca_ground, pca_air, by=c("Date", "site", "station", "fullname")) %>% 
#    select(Date, site, station, fullname, avg.x, avg.y) %>% rename("groundtemp"="avg.x", "airtemp"="avg.y")
#  big_df <<- full_join(big_df, pca_wind, by=c("Date", "site", "station", "fullname")) %>% 
#    select(Date, site, station, fullname, groundtemp, airtemp, avg) %>% rename("windspeed" = avg)
#  big_df <<- full_join(big_df, pca_solar, by=c("Date", "site", "station", "fullname")) %>% 
#    select(Date, site, station, fullname, groundtemp, airtemp, windspeed, avg)  %>% rename("solar" = avg)
#  big_df <<- full_join(big_df, pca_vwc, by=c("Date", "site", "station", "fullname")) %>% 
#    select(Date, site, station, fullname, groundtemp, airtemp, windspeed, solar, avg)  %>% rename("vwc" = avg)

  big_df <<- df %>% filter(site == cursite) %>% filter(grounddepth == "30cm") %>% filter(vwcdepth == "30:45cm")
  big_df <- big_df %>% select(Date, fullname, site, station, groundtemp, airtemp, windspeed, solar, vwc)
  if(szn == "Winter"){
    big_df <<- big_df %>% select(-solar)
  }
  return(big_df)
}

Filter by Date Range

pick_dates <- function(datemin, datemax, big_df){
  pca_df <<- big_df %>% filter(Date >= datemin) %>% filter(Date < datemax)
  
  # get rid of NAs
  pca_df <<- na.omit(pca_df)
  pca_df <<- unique(pca_df)
  return(pca_df)
}

Calculate PCA

calc_pca <- function(pca_df){
  pca <<- prcomp(pca_df[,5:ncol(pca_df)], center=TRUE, scale.=TRUE)

  #take out variables
  sd <- pca$sdev
  loads <<- pca$rotation
  rownames(loads) <<- colnames(pca_df[5:ncol(pca_df)])
  scores <<- pca$x
  
  var <- sd^2
  varPercent <- var/sum(var) * 100
  
  return(list("pca"=pca, "loads"=loads))
}

Make Scree Plot

make_scree <- function(pca){
  sd <- pca$sdev
  
  var <- sd^2
  varPercent <- var/sum(var) * 100
  
  barplot(varPercent, xlab="PC", ylab="Percent Variance", names.arg=1:length(varPercent), 
          las=1, ylim=c(0, max(varPercent)), col="gray")
  abline(h=1/ncol(pca_df[5:ncol(pca_df)])*100, col="red")
}

Display Eigenvectors and Eigenvalues

make_eigen <- function(pca){
  eigenvectors <- pca$rotation
  print("Eigenvectors (Loadings):")
  print(eigenvectors)
  
  print("Loadings Cutoff:")
  sqrt(1/ncol(pca_df[5:ncol(pca_df)])) # cutoff for "important" loadings
  
  # Access the eigenvalues (variances of the principal components)
  eigenvalues <- (pca$sdev)^2
  print("Eigenvalues:")
  print(eigenvalues)
}

===============

PCA PLOTS

===============

make_pca <- function(pca_df, szn, yr, site){
  if(site == "TNHA"){
    SOUTH <<- pca_df$fullname == "TNHA-SA"
    NORTH <<- pca_df$fullname == "TNHA-SC"
    s <- "TNHA-SA"
    n <- "TNHA-SC"
  } else{
    if(site == "SSMH"){
      SOUTH <<- pca_df$fullname == "SSMH-SA"
      NORTH <<- pca_df$fullname == "SSMH-SB"
      s <- "SSMH-SA"
      n <- "SSMH-SB"
    } else {
      SOUTH <<- pca_df$fullname == "BEO-BASE"
      n <- "BEO"
    }
  }
  
    scaling <- 2
  textNudge <- 1.1
  limNudge <- 1.3
  
  xlimit <- seq(floor(min(scores[,1])*limNudge),ceiling(max(scores[,1])*limNudge), 1)
  ylimit <- seq(floor(min(scores[,2])*limNudge),ceiling(max(scores[,2])*limNudge), 1)
  
  plot(scores[, 1], scores[, 2], xlab="PCA 1", ylab="PCA 2", type="n", asp=1, 
       las=1, xaxt='n', yaxt='n')
  
  axis(side = 1, at=xlimit)
  axis(side = 2, at=ylimit)
  
  title(paste0(szn, " ", yr," Principal Component Analysis: ",site," North v. South\n",format(as.Date(min(pca_df$Date)), format="%B %d %Y")," - ",format(as.Date(max(pca_df$Date)), format="%B %d %Y")), adj=0.5)
  
 
   
  points(scores[SOUTH, 1], scores[SOUTH, 2], pch=16, cex=0.7, col="mediumturquoise")
   
  if(site != "BEO"){
    points(scores[NORTH, 1], scores[NORTH, 2], pch=16, cex=0.7, col="salmon")
     legend(x = "topright",          # Position
       legend = c(paste0(s, " (south)"), paste0(n, " (north)")),  # Legend texts
       col = c("mediumturquoise","salmon"),
       pch = 19)  #colors
  
  } else{
    legend(x = "topright",          # Position
       legend = "BEO",  # Legend texts
       col = "mediumturquoise",
       pch = 19) 
    
  }
  
   
  
  arrows(0, 0, loads[, 1]* scaling, loads[, 2]* scaling, length=0.1, angle=20, col="darkred")
   
  text(loads[1, 1]*scaling*textNudge, loads[1, 2]*scaling*textNudge, rownames(loads)[1],   col="darkred", cex=0.7) # ground label
  
  text(loads[2, 1]*scaling*textNudge, loads[2, 2]*scaling*textNudge+0.2, rownames(loads)[2],   col="darkred", cex=0.7) # air label
  
  if(nrow(loads) > 2){
    text(loads[3, 1]*scaling*textNudge, loads[3, 2]*scaling*textNudge, rownames(loads)[3],   col="darkred", cex=0.7) # wind label
  
    if(nrow(loads)>3){
      text(loads[4, 1]*scaling*textNudge-0.2, loads[4, 2]*scaling*textNudge, rownames(loads)[4],   col="darkred", cex=0.7) # solar label
      
      if(nrow(loads)>4){
  
  text(loads[5, 1]*scaling*textNudge, loads[5, 2]*scaling*textNudge, rownames(loads)[5],   col="darkred", cex=0.7) # vwc label
    }

    }
  
  
  }
  
  
 
  #text(-3, 1]*scaling*textNudge, 1, "TNHA-SA \n(south)", col="mediumturquoise")
  #text(1, 1, "TNHA-SC \n(north)", col="salmon")
}
for(i in c(1:nrow(all_dates))){
  
  month <- all_dates$months[i]
  startdate <- all_dates$start[i]
  enddate <- all_dates$end[i]
  szn <<- all_dates$szn[i]
  yr <<- substr(all_dates$start[i], 1, 4)
  
  for(site in sites){
    big_df <- pick_site(site)
    pca_df <- pick_dates(startdate, enddate, big_df)
    
    if(nrow(pca_df) > 4){
      p <- calc_pca(pca_df)
      pca <- p$pca
      loads <- p$loads
      if(scree == T){
        make_scree(pca)
      }
      if(eigen == T){
        make_eigen(pca)
      }
      make_pca(pca_df, szn, yr, site)
              
    }
  }
  
}
## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2        PC3        PC4         PC5
## groundtemp  0.58137611  0.29351723 -0.1493024 -0.2625688  0.69614359
## airtemp     0.40081787  0.02227347  0.8921022  0.1909985 -0.08075967
## windspeed   0.35933972 -0.58168040 -0.2763283  0.6616764  0.13546167
## solar      -0.03043223  0.75339953 -0.1486000  0.6341684 -0.08492024
## vwc         0.60934039  0.08595687 -0.2888306 -0.2336492 -0.69519830
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1866519 1.4752764 0.7964961 0.3521848 0.1893908

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3        PC4         PC5
## groundtemp  0.4369685  0.4571635  0.34367331  0.6753435  0.16081031
## airtemp     0.5210334  0.3419642  0.02135118 -0.4229239 -0.65746800
## windspeed   0.1931109 -0.7514066  0.40085714  0.2754213 -0.40193653
## solar      -0.5395240  0.2297655 -0.24632106  0.4654549 -0.61546679
## vwc        -0.4573892  0.2380293  0.81244832 -0.2693351 -0.03903239
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.78948323 1.39262821 0.48290688 0.25710851 0.07787317

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4        PC5
## groundtemp  0.5733532 -0.3528103 -0.1948071  0.02399744  0.7129272
## airtemp     0.5118822  0.1577140 -0.5141804 -0.48915707 -0.4576536
## windspeed   0.1602001 -0.6559692  0.5681036 -0.37377254 -0.2856444
## solar      -0.2269111 -0.6320616 -0.5456949  0.40659313 -0.2931016
## vwc         0.5762725  0.1444082  0.2777481  0.67462997 -0.3388018
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.7168968 1.1515510 1.0461205 0.5891969 0.4962348

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4        PC5
## groundtemp -0.4317808 -0.47061294 -0.36001139 -0.6188737 -0.2819147
## airtemp    -0.4224374 -0.56445877 -0.04191413  0.5395834  0.4582859
## windspeed   0.3933992 -0.58981171  0.40694961  0.1856797 -0.5452286
## solar      -0.5368509  0.32495475 -0.06199164  0.4511760 -0.6315004
## vwc         0.4383379 -0.08022666 -0.83617226  0.2963248 -0.1201295
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2422577 1.2216905 0.7335001 0.5025504 0.3000013

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4         PC5
## groundtemp  0.4764745  0.2904409 -0.5748283  0.17488761 -0.57236603
## airtemp     0.5405087  0.2056094 -0.2180673  0.03890125  0.78518050
## windspeed   0.2617445 -0.8390735 -0.2962870 -0.37292141 -0.02427104
## solar       0.4610205  0.2511570  0.5200108 -0.64120602 -0.20693938
## vwc        -0.4469589  0.3259518 -0.5136360 -0.64628591  0.11169476
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7497899 0.9904860 0.6477077 0.4119337 0.2000827

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3         PC4        PC5
## groundtemp -0.5233938 -0.07169988  0.63660524 -0.10946052  0.5510629
## airtemp    -0.3403392 -0.51822724 -0.68117280 -0.05227358  0.3858508
## windspeed  -0.4980245  0.37157556 -0.18051225 -0.67703942 -0.3506225
## solar      -0.5673328 -0.21025215  0.08639326  0.56562399 -0.5536545
## vwc         0.2008316 -0.73758070  0.30114600 -0.45494284 -0.3434814
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2766739 1.3332954 0.7480661 0.4410188 0.2009459

## [1] "Eigenvectors (Loadings):"
##                   PC1          PC2        PC3        PC4        PC5
## groundtemp  0.3617158  0.565296864  0.1501319  0.5624719 -0.4590064
## airtemp     0.3170337  0.573836961 -0.5492273 -0.2761577  0.4385055
## windspeed  -0.4152141  0.489213591  0.2738220 -0.6075697 -0.3796681
## solar       0.5413681 -0.334370851 -0.2968998 -0.4086589 -0.5830653
## vwc        -0.5506080  0.004097993 -0.7160181  0.2668691 -0.3360254
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9334411 1.5230496 0.6571767 0.5561107 0.3302219

## [1] "Eigenvectors (Loadings):"
##                   PC1          PC2       PC3        PC4         PC5
## groundtemp -0.5433419  0.009037075 0.1112112  0.6193898  0.55559543
## airtemp    -0.5580011  0.042859211 0.3009233  0.1374326 -0.75983902
## windspeed  -0.3851808  0.628860298 0.1365009 -0.6071134  0.26258576
## solar      -0.2809988 -0.763843028 0.2919724 -0.4628589  0.19518516
## vwc         0.4075412  0.138419559 0.8906160  0.1210104  0.08312559
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7999726 1.1077216 0.6403668 0.3451703 0.1067686

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2        PC3         PC4         PC5
## groundtemp -0.5424644  0.08638551 -0.1378263 -0.82218379  0.05733753
## airtemp    -0.3894427  0.59120600  0.3136868  0.30513983  0.55434659
## windspeed  -0.1995913 -0.77789083  0.1326904  0.06794479  0.57691068
## solar      -0.4787500 -0.04268902 -0.7569471  0.43131101 -0.09988979
## vwc         0.5338806  0.18993817 -0.5403969 -0.20064348  0.58873455
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2874690 1.3456722 0.6761321 0.4485146 0.2422121

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2        PC3           PC4        PC5
## groundtemp -0.5685001 -0.06268284  0.3240193  0.3911702829 -0.6441086
## airtemp    -0.4942970  0.40560349  0.4497682  0.0008752545  0.6235897
## windspeed   0.3946973  0.52459340  0.5035564 -0.4032156174 -0.3909782
## solar      -0.5115169 -0.09488905 -0.2600369 -0.7985494931 -0.1550678
## vwc        -0.1226485  0.73983411 -0.6095341  0.2161502179 -0.1391046
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2377598 1.1488593 0.8279779 0.5365954 0.2488076

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2           PC3         PC4         PC5
## groundtemp -0.6100555 -0.03055292  5.891908e-05 -0.39894026  0.68391920
## airtemp    -0.5921608 -0.23632312 -1.955389e-01 -0.26822794 -0.69520885
## windspeed  -0.1250899 -0.72119520  5.873843e-01  0.34086190  0.05498098
## solar      -0.4678737  0.27078353 -2.396481e-01  0.80391556  0.06371029
## vwc         0.2064573 -0.59141497 -7.478724e-01  0.08020829  0.20459049
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.38036384 1.15036987 0.87645470 0.53226673 0.06054485

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3        PC4        PC5
## groundtemp -0.5751975  0.1281761 -0.1650406  0.3702623  0.6988463
## airtemp    -0.3991643  0.5310987 -0.2882705 -0.6760545 -0.1358401
## windspeed  -0.3390595 -0.5553027 -0.6412467  0.1157411 -0.3899798
## solar      -0.5419828  0.1736807  0.5230846  0.3409050 -0.5350289
## vwc         0.3179763  0.6024774 -0.4526002  0.5255892 -0.2341391
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3755368 1.5164466 0.5207722 0.3184031 0.2688413

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2          PC3        PC4        PC5
## groundtemp -0.4336096 -0.05329431 -0.808533863 -0.3531042  0.1753080
## airtemp    -0.4638532 -0.46754740  0.138403799  0.5620022  0.4808716
## windspeed   0.3024553 -0.87407939 -0.096119098 -0.1789274 -0.3213287
## solar      -0.5379080  0.04962938 -0.008708819  0.2788136 -0.7939641
## vwc        -0.4647565 -0.10991493  0.563739883 -0.6706109  0.0663208
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6243707 0.9237834 0.6673881 0.4635558 0.3209020

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3        PC4          PC5
## groundtemp -0.4945852  0.5443194  0.06072952  0.1985534 -0.644973133
## airtemp    -0.5453689  0.2421501  0.18284895  0.2849931  0.727517227
## windspeed  -0.3338391 -0.2690485 -0.88777663  0.1673535 -0.003135147
## solar      -0.4742418 -0.1057386  0.04582009 -0.8727619  0.010062780
## vwc        -0.3487183 -0.7493410  0.41548930  0.2993911 -0.233703344
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7753998 0.8523685 0.7810439 0.4786796 0.1125082

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2         PC3        PC4         PC5
## groundtemp -0.30484619  0.5275309 -0.66192427 -0.4317323  0.06514137
## airtemp     0.06069248 -0.7194095 -0.65938329  0.1154717  0.17506089
## windspeed  -0.54844298  0.1954132 -0.11237100  0.8042647  0.03943278
## solar      -0.55313646 -0.3346518  0.08028655 -0.2495373 -0.71647292
## vwc         0.54464262  0.2323421 -0.32862862  0.3019319 -0.67098576
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.73460177 1.16528035 0.88182515 0.18826932 0.03002341

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3         PC4        PC5
## groundtemp -0.5754511  0.28654325  0.09263654  0.48645658  0.5844036
## airtemp    -0.2317870  0.77785622  0.14272761 -0.53457488 -0.1872779
## windspeed   0.1937843  0.26185817 -0.91620029 -0.02874316  0.2315790
## solar      -0.4886383 -0.48412160 -0.14291257 -0.63985245  0.3114863
## vwc        -0.5820748 -0.09944468 -0.33346714  0.25952336 -0.6875659
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2364727 1.2583405 0.9746343 0.3191523 0.2114002

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3         PC4         PC5
## groundtemp -0.58300374  0.2123251  0.2514027 -0.18831891  0.71858011
## airtemp    -0.56785798  0.2611952  0.2804861 -0.22129942 -0.69402344
## windspeed  -0.01154194  0.7491463 -0.6609457  0.04088021  0.01123119
## solar      -0.32356184 -0.5209764 -0.6157960 -0.49413990 -0.02263423
## vwc        -0.48251266 -0.2325050 -0.2051102  0.81836246 -0.03654660
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.56873353 1.14708989 0.81291897 0.45649420 0.01476341

## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2        PC3        PC4         PC5
## groundtemp -0.12016495  0.69412866 -0.4798235 -0.4997208  0.15425395
## airtemp    -0.08252261  0.67440280  0.6637752  0.3117337 -0.02440014
## windspeed  -0.53592028  0.06852141 -0.4599273  0.6972169  0.10222369
## solar      -0.60839078 -0.07402178  0.1058815 -0.2835029 -0.72992921
## vwc         0.56692196  0.23063361 -0.3262328  0.2943056 -0.65754393
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.57267602 1.59724952 0.51555198 0.29697140 0.01755108

## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2         PC3        PC4        PC5
## groundtemp  0.08637424 -0.77275616  0.13042676 -0.5644290 -0.2445326
## airtemp    -0.66395328  0.08664157 -0.03965365 -0.4758573  0.5688996
## windspeed   0.64983357 -0.13803915 -0.24806153 -0.0727247  0.7013117
## solar      -0.35907757 -0.60285500 -0.28811402  0.6270784  0.1771782
## vwc        -0.02202622  0.11336753 -0.91480721 -0.2376206 -0.3054944
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9604336 1.4263716 1.1328953 0.3308608 0.1494387

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3         PC4         PC5
## groundtemp  0.55260211  0.1907890 0.22331044 -0.51750256  0.58357006
## airtemp     0.57509320  0.1094134 0.11832754 -0.17672651 -0.78234445
## windspeed  -0.03991566 -0.6950461 0.71763132 -0.00757395 -0.01629531
## solar      -0.20102296  0.6762371 0.64635835  0.28995818 -0.02093555
## vwc        -0.56735811  0.1060307 0.05794151 -0.78538254 -0.21605396
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7084715 1.0416776 0.9561650 0.1745618 0.1191241

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3        PC4         PC5
## groundtemp -0.06063857  0.6678957  0.3108137 0.65002090  0.17636913
## airtemp    -0.63144250  0.1589301 -0.1376075 0.04572657 -0.74497976
## windspeed   0.51761081 -0.3538444 -0.1743425 0.61575261 -0.44421461
## solar      -0.27561807 -0.5266634  0.7914551 0.14140319 -0.01625548
## vwc        -0.50370334 -0.3550724 -0.4771399 0.41980444  0.46508949
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.23911277 1.72684636 0.54766912 0.39193179 0.09443996

## [1] "Eigenvectors (Loadings):"
##                   PC1          PC2         PC3        PC4          PC5
## groundtemp 0.68774889 -0.008083377  0.07644884  0.1204712 -0.711743207
## airtemp    0.67768200 -0.001734315  0.18011341  0.1431588  0.698433121
## windspeed  0.09899956 -0.706193854  0.14998510 -0.6848202  0.003878275
## solar      0.12957900  0.704295725  0.05273102 -0.6959673  0.005074761
## vwc        0.20287175 -0.072037648 -0.96769760 -0.1079027  0.074645747
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.04519058 1.40500973 0.96315075 0.54927019 0.03737874

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2        PC3         PC4         PC5
## groundtemp  0.3752825  0.59376864 -0.1039337 -0.41463060  0.56910553
## airtemp     0.4966456 -0.40594270 -0.2574321  0.55989800  0.45694269
## windspeed  -0.4342070  0.48493444  0.2980482  0.63251122  0.29563442
## solar       0.4643719 -0.07556578  0.8795915 -0.00489644 -0.07030862
## vwc         0.4564310  0.49170899 -0.2457895  0.33838037 -0.61235550
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.4481220 1.6778419 0.5973827 0.2131039 0.0635494

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4           PC5
## groundtemp  0.5487333 -0.1885236 0.31986989 -0.33547378  6.696948e-01
## airtemp     0.5798506 -0.1319911 0.09390334 -0.33483767 -7.248569e-01
## windspeed   0.2474898  0.9486602 0.18188335  0.07415932  1.454162e-02
## solar       0.4557186 -0.1902919 0.01795847  0.86936013 -5.878775e-05
## vwc        -0.3061606 -0.1042594 0.92491154  0.11855146 -1.608722e-01
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.65791499 0.89543062 0.85607944 0.54950023 0.04107472

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4        PC5
## groundtemp  0.1450970 -0.71802429  0.09771796 -0.3415721 -0.5806614
## airtemp    -0.6004558 -0.09764067 -0.23632691  0.6208544 -0.4342908
## windspeed   0.5147493  0.29416686  0.52145759  0.4530718 -0.4138923
## solar      -0.4602201 -0.24019780  0.80022830  0.0306449  0.2986603
## vwc         0.3763380 -0.57504634 -0.14939021  0.5400510  0.4622975
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1163562 1.5983508 0.6826193 0.3551998 0.2474739

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2        PC3        PC4        PC5
## groundtemp  0.3620549  0.44403615  0.6725537 -0.4534146 -0.1176220
## airtemp     0.5655067  0.04171319 -0.3326951 -0.1902176  0.7291045
## windspeed  -0.1227730 -0.75625220  0.5252191 -0.1476858  0.3396222
## solar      -0.4732758  0.46818415  0.2976250  0.3732842  0.5734910
## vwc         0.5568260 -0.09989057  0.2693506  0.7727097 -0.1016692
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0864024 1.1717707 0.8745017 0.4584589 0.4088663

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2         PC3        PC4        PC5
## groundtemp  0.01816347  0.8060002 -0.34196490  0.4114429  0.2526035
## airtemp     0.51226663 -0.3030941 -0.31018063  0.5893687 -0.4496103
## windspeed  -0.42390614 -0.1006386 -0.80948875 -0.3019329 -0.2524677
## solar       0.56253028 -0.2173335 -0.35939066 -0.2955753  0.6479193
## vwc        -0.49103590 -0.4484816 -0.04913558  0.5521151  0.5005024
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1631494 1.2238474 0.8130995 0.5508904 0.2490133

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3         PC4        PC5
## groundtemp  0.4349587  0.09254863  0.80525066  0.28567524 -0.2687130
## airtemp     0.4843886  0.08723854  0.06787056 -0.86192578  0.1011667
## windspeed  -0.4019366 -0.64670282  0.48923892 -0.20936363  0.3701818
## solar      -0.3934180  0.74589599  0.29891202 -0.07028909  0.4411037
## vwc         0.5097528 -0.09611876 -0.13513637  0.35594933  0.7654747
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.9167692 0.7837843 0.5966281 0.4090800 0.2937384

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3         PC4         PC5
## groundtemp -0.3820949  0.7114474  0.07859378  0.04512477 -0.58278023
## airtemp    -0.3692636 -0.2171028  0.83712534 -0.33410154  0.06409468
## windspeed  -0.4503897 -0.5435733 -0.06545702  0.62392178 -0.32880730
## solar      -0.4698761 -0.2400063 -0.52457781 -0.65957415 -0.10674054
## vwc         0.5422302 -0.3059968  0.11652333 -0.24904511 -0.73263312
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0673384 1.0575460 0.8630593 0.5514500 0.4606064

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4          PC5
## groundtemp -0.6216486 -0.04338808 -0.14673477 -0.1948665 -0.743079025
## airtemp    -0.3933375  0.17255525 -0.74248785  0.3539010  0.372795080
## windspeed  -0.1131222 -0.87561028  0.08111082  0.4624492  0.008472475
## solar      -0.3655514  0.41634092  0.58015968  0.5969414  0.010398396
## vwc         0.5589402  0.16825290 -0.28985595  0.5163166 -0.555587510
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0369958 1.0973884 0.9725535 0.5898867 0.3031755

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4         PC5
## groundtemp -0.5465201 -0.39589672 -0.24961662  0.2542445 -0.64624518
## airtemp    -0.6159923 -0.06735731 -0.40166906 -0.3329823  0.58634568
## windspeed  -0.2333193  0.68296847  0.03556769 -0.5302914 -0.44344351
## solar      -0.2677192 -0.39731812  0.80177683 -0.3566891  0.01978789
## vwc         0.4424452 -0.46305633 -0.36365144 -0.6450167 -0.20379452
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0013917 1.6419385 0.8650116 0.3239791 0.1676791

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2          PC3         PC4         PC5
## groundtemp -0.5288896 -0.04553077  0.368543346  0.43192919  0.62913889
## airtemp    -0.5368187 -0.32956021  0.244969468  0.14998584 -0.72160233
## windspeed   0.4781873  0.01523509 -0.002130935  0.85841549 -0.18499476
## solar       0.3834884  0.04014878  0.894360307 -0.22206612 -0.04616257
## vwc        -0.2374341  0.94205785  0.065428299  0.06892685 -0.21707208
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6999237 0.9424558 0.7199195 0.5131698 0.1245312

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4         PC5
## groundtemp -0.5545440 -0.04501954  0.15873979  0.4023159 -0.70950530
## airtemp    -0.5152429  0.16807209  0.48099839  0.2508120  0.64188041
## windspeed  -0.1953732 -0.92559197  0.14375249 -0.2775001  0.08624219
## solar      -0.4698729  0.32583337 -0.03340047 -0.8107903 -0.12064663
## vwc         0.4099444  0.08268694  0.84950683 -0.2021092 -0.25019681
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6526389 0.9883834 0.7169425 0.4655538 0.1764813

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4           PC5
## groundtemp -0.1989475 -0.2286524 -0.9094261  0.08778720 -0.2708791887
## airtemp    -0.4681377 -0.4644752  0.2860514  0.69518615  0.0008271347
## windspeed   0.5064199 -0.4897870  0.2066861 -0.07046171 -0.6752507565
## solar      -0.4773710 -0.4957697  0.1270438 -0.70511109  0.1140508766
## vwc        -0.5068756  0.4962882  0.1796092 -0.08284391 -0.6765002545
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9205803 1.2681201 1.0019530 0.4625297 0.3468170

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2         PC3         PC4          PC5
## groundtemp -0.53669949 -0.3916839  0.09795792 -0.29959251 -0.677632592
## airtemp    -0.46098128  0.5745986 -0.04185849  0.62687243 -0.250223015
## windspeed   0.08359595  0.1112455  0.98992781  0.02603687  0.001080014
## solar      -0.51806711  0.4291820  0.01088336 -0.60200654  0.429975486
## vwc        -0.47335833 -0.5655505  0.09260987  0.39266460  0.541592493
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3403379 1.0375846 0.9904110 0.3596465 0.2720201

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3        PC4        PC5
## groundtemp -0.5020008  0.3794915 -0.34701355 -0.5024146 -0.4807729
## airtemp    -0.5031920  0.2950101  0.04926731  0.8024446 -0.1158546
## windspeed  -0.2930101 -0.2138315  0.86621288 -0.1987811 -0.2803257
## solar      -0.6211474 -0.2455283 -0.08332662 -0.1909692  0.7144783
## vwc         0.1520331  0.8142157  0.34624594 -0.1663702  0.4078890
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.7435883 1.1294398 0.9809361 0.7017661 0.4442697

If you want to just make one plot (for testing):

  # look at all_dates and pick a row to use (set to i)

i <- 8
site <- "TNHA"

month <- all_dates$months[i]
startdate <- all_dates$start[i]
enddate <- all_dates$end[i]
szn <<- all_dates$szn[i]
yr <<- substr(all_dates$start[i], 1, 4)

big_df <- pick_site(site)
pca_df <- pick_dates(startdate, enddate, big_df)


if(nrow(pca_df > 0)){
    p <- calc_pca(pca_df)
    pca <- p$pca
    loads <- p$loads # not used at the moment
    if(scree == T){
      make_scree(pca)
    }
    if(eigen == T){
      make_eigen(pca)
    }
    make_pca(pca_df, szn, yr, site)
            
  }
## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3        PC4         PC5
## groundtemp -0.06063857  0.6678957  0.3108137 0.65002090  0.17636913
## airtemp    -0.63144250  0.1589301 -0.1376075 0.04572657 -0.74497976
## windspeed   0.51761081 -0.3538444 -0.1743425 0.61575261 -0.44421461
## solar      -0.27561807 -0.5266634  0.7914551 0.14140319 -0.01625548
## vwc        -0.50370334 -0.3550724 -0.4771399 0.41980444  0.46508949
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.23911277 1.72684636 0.54766912 0.39193179 0.09443996
#==================
## Code Graveyard
#==================
#The following code makes a bi-plot, which is similar to the code above, however it is not grouped at all. It is a bit harder to read, but it is very straightforward code for a simpler PCA.
#   ### Bi-plots
#   dev.new(height=7, width=7)
#   biplot(scores[, 1:2], loadings[, 1:2], cex=0.7)
#NOTE: this is our code graveyard for code that can be used to find other PCA results, including k-means clustering. Above, we use clusters of TNHA-North and TNHA-South. Here, we use k-means clustering to group data points together and compare that to the different stations at TNHA. This can be useful, however we are particularly interested in identifying differences between north- and south-facing sensors.
#   ### K Means Clustering
#   pc_data <- summer_tnha_pca$x
#   
#   # Select the first two principal components
#   pc_to_use <- pc_data[, 1:2]
#   # Run k-means for different numbers of clusters
#   wcss <- numeric()
#   for (k in 1:10) {
#     kmeans_result <- kmeans(pc_to_use, centers = k)
#     wcss[k] <- kmeans_result$tot.withinss
#   }
#   
#   # Plot the results
#   plot(1:10, wcss, type = "b", xlab = "Number of Clusters", ylab = "Within-Cluster Sum of #   Squares")
#   # We can use 2-3 clusters (where the change in slope drops off)
#   # Perform k-means clustering
#   kmeans_result <- kmeans(pc_to_use, centers = 3)  # Change 'centers' based on your specific #   case
#   # Add cluster assignments to the data
#   clustered_data <- data.frame(pc_to_use, cluster = kmeans_result$cluster)
#   
#   # Plot the clusters
#   ggplot(clustered_data, aes(x = PC1, y = PC2, color = factor(cluster))) +
#     geom_point() +
#     theme_minimal() +
#     labs(title = "Clusters in Principal Component Space")

Find Each Data Point

#   #Want to look at clustered data
#   
#   cluster_sensors = data.frame(clustered_data, summer_tnha$station)
#   
#   cluster1 = filter(cluster_sensors, cluster == 1)
#   cluster2 = filter(cluster_sensors, cluster == 2)
#   cluster3 = filter(cluster_sensors, cluster == 3)
#   
#   cluster_sensors$cluster = as.factor(cluster_sensors$cluster)
#   
#   ggplot(cluster_sensors, aes(x = summer_tnha.station, fill = cluster)) +
#     geom_bar(position = "dodge") +
#     theme_minimal() +
#     labs(title = "Number of Instances in Each Group",
#          x = "Category",
#          y = "Count")
#   
#   p = ggplot(cluster_sensors, aes(x = PC1, y = PC2)) +
#     geom_point(aes(color = summer_tnha.station, shape = cluster), size = 2) +
#     scale_shape_manual(values = c(16, 17, 18)) +  # Set shapes manually
#     theme_minimal() +
#     labs(title = "Biplot of Eigenvectors and PC Clusters",
#          x = "PC1",
#          y = "PC2") 
#   p + geom_segment(data = as.data.frame(eigenvectors), aes(x = 0, y = 0, xend = PC1*4, yend = #   PC2*4),
#                    arrow = arrow(type = "closed", length = unit(0.1, "inches")),
#                    linewidth = 0.5, color = "black") +
#     geom_text(data = as.data.frame(eigenvectors), aes(x = PC1*2.5, y = PC2*5+0.1, label = #   rownames(eigenvectors)))
#   
#